Settings
source("scripts/utils.R")
source("scripts/settings.R")
load("data/processed/rna_dynamics.RData")
load("data/processed/chip_dynamics.RData")
load(paste0("data/processed/h3k4me3_data_sperm_", sperm_data, ".RData"))
Bind expression and chip data
chip_dfs[["Spermatid"]]$expression <- log(expression.data[["Spermatid"]] + 1)
chip_dfs[["ZGA"]]$expression.6hpf <- log(expression.data[["6hpf"]] + 1)
chip_dfs[["ZGA"]]$expression.7hpf <- log(expression.data[["7hpf"]] + 1)
chip_dfs[["ZGA"]]$expression.8hpf <- log(expression.data[["8hpf"]] + 1)
chip_dfs[["ZGA"]]$expression.9hpf <- log(expression.data[["9hpf"]] + 1)
chip_dfs[["Spermatid"]]$expression[is.na(chip_dfs[["Spermatid"]]$expression)] <- 0
chip_dfs[["ZGA"]]$expression.6hpf[is.na(chip_dfs[["ZGA"]]$expression.6hpf)] <- 0
chip_dfs[["ZGA"]]$expression.7hpf[is.na(chip_dfs[["ZGA"]]$expression.7hpf)] <- 0
chip_dfs[["ZGA"]]$expression.8hpf[is.na(chip_dfs[["ZGA"]]$expression.8hpf)] <- 0
chip_dfs[["ZGA"]]$expression.9hpf[is.na(chip_dfs[["ZGA"]]$expression.9hpf)] <- 0
Balloon plots
plot_balloonplot(rna_dyns[names(rna_dyns) != "ND"], chip_dyns[names(chip_dyns) != "Absent"], limit=50, x_caption="RNA expression", y_caption="H3K4me3")

ggsave('output/rna_chip/rna_chip_balloon_plot.pdf', width=5, height=3)
plot_balloonplot(rna_dyns_timing, chip_dyns, limit=100, x_caption="RNA expression", y_caption="H3K4me3", remove_dyns=c("NonZygotic", "Absent"))

ggsave('output/rna_chip/rna_chip_timing_balloon_plot.pdf', width=5, height=3)
rna_small <- rna_dyns[!names(rna_dyns) %in% c("ND")]
chip_small <- chip_dyns[!names(chip_dyns) %in% c("Absent")]
genes_of_interest <- unique(c(unlist(rna_small), unlist(chip_small)))
cols = lapply(c(rna_small, chip_small), function(dyn) genes_of_interest %in% dyn)
df <- data.frame(cols)
metadata <- data.frame(set = c(names(rna_small), names(chip_small)), type = c(rep("rna", length(rna_small)), rep("chip", length(chip_small))))
upset(
df,
colnames(df),
name="dynamic",
width_ratio=0.15,
stripes=upset_stripes(
mapping=aes(color=type),
colors=c(
'rna'='green',
'chip'='orange'
),
data=metadata
)
)

Peak line plots
for(stage in stages){
plot_peak(stage=stage, dyns=rna_dyns, cols=cols_rna, remove_NA=TRUE, onlypeaks=FALSE)
ggsave(file=paste0("output/rna_chip/", stage, "_rna_peak_plot.pdf"), width = 5, height = 4)
}
for(stage in stages){
plot_peak(stage=stage, dyns=rna_dyns_timing, cols=cols_rna_timing, remove_NA=TRUE, onlypeaks=FALSE)
ggsave(file=paste0("output/rna_chip/", stage, "_rna_timing_peak_plot.pdf"), width = 5, height = 4)
}
Euler / Venns of overlaps
genesAbsolute <- lapply(chip_dyns, function(x){as.data.frame(lapply(rna_dyns, function(y){length(intersect(x, y))}))})
genesAbsolute <- do.call(rbind, genesAbsolute)
# all genes
plot(venn(c("Kept"=length(chip_dyns$Kept), "GZ"=length(rna_dyns$GZ), "Kept&GZ"=length(intersect(chip_dyns$Kept, rna_dyns$GZ)))),
quantities = TRUE,fills=c(cols_chip["Kept"], cols_rna["GZ"], "#fa193b"))

plot(euler(c("Kept"=length(chip_dyns$Kept), "GZ"=length(rna_dyns$GZ), "Kept&GZ"=length(intersect(chip_dyns$Kept, rna_dyns$GZ)))),
quantities = TRUE,fills=c(cols_chip["Kept"], cols_rna["GZ"], "#fa193b"))

# only genes where both chip & rna dynamics are known
euler_kept_gz <- function() {plot(euler(c("Kept"=sum(genesAbsolute["Kept",])-sum(genesAbsolute["Kept","GZ"]), "GZ"=sum(genesAbsolute[,"GZ"])-sum(genesAbsolute["Kept","GZ"]) , "Kept&GZ"=sum(genesAbsolute["Kept","GZ"]))),
quantities = TRUE,fills=c(cols_chip["Kept"], cols_rna["GZ"], "#fa193b"))}
plot(venn(c("Kept"=sum(genesAbsolute["Kept",])-sum(genesAbsolute["Kept","GZ"]), "GZ"=sum(genesAbsolute[,"GZ"])-sum(genesAbsolute["Kept","GZ"]) , "Kept&GZ"=sum(genesAbsolute["Kept","GZ"]))),
quantities = TRUE,fills=c(cols_chip["Kept"], cols_rna["GZ"], "#fa193b"))

euler_kept_gz()

pdf('output/rna_chip/kept_gz_overlap_euler.pdf', width=4, height=3)
euler_kept_gz()
dev.off()
## quartz_off_screen
## 2
Stacked bar of H3K4me3 ratios
cols_mod <- c(cols_chip[!names(cols_chip)%in%c("Recovered", "Lost", "Gained")], "Other"="darkgrey")
dyns_mod = chip_dyns[c("Kept", "Absent")]
dyns_mod[["Other"]] = unname(unlist(chip_dyns[c("Recovered", "Lost", "Gained")]))
genesAbsolute_mod = genesAbsolute[c("Kept", "Absent"),]
genesAbsolute_mod["Other", ] <- colSums(genesAbsolute[c("Recovered", "Lost", "Gained"),])
ggdf1 <- melt(cbind(genesAbsolute_mod, rownames(genesAbsolute_mod)))
## Using rownames(genesAbsolute_mod) as id variables
names(ggdf1)<- c("H3K4me3", "RNA", "count")
ggdf1$H3K4me3 <- factor(ggdf1$H3K4me3, levels=c("Kept", "Other", "Absent"))
ggplot(ggdf1, aes(fill=H3K4me3, y=count, x=RNA)) + geom_bar(position="fill", stat="identity") + theme_bw() + ylab("%") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), axis.title.x = element_blank(), legend.title=element_blank()) + scale_fill_manual(values = cols_mod) + theme_pubr()

ggsave('output/rna_chip/ratio_h3k4me3_rna_dynamics_stacked.pdf', width=5, height=4)
Violin plots of H3K4me3 for RNA groups
for(stage in stages){
g <- plot_stage_dynamics(chip_dfs, dyns=rna_dyns, cols=cols_rna, timepoint=stage, type="promoter.mean", only_violin = TRUE)
grid.arrange(g)
ggsave(file=paste0("output/rna_chip/", stage, "_rna_dynamics_violin_plot.pdf"), g, width = 5, height = 4)
}




for(stage in stages){
g <- plot_stage_dynamics(chip_dfs, dyns=rna_dyns, cols=cols_rna, timepoint=stage, type="peak.width")
grid.arrange(g)
ggsave(file=paste0("output/rna_chip/", stage, "_peak_width_rna_dynamics_violin_ecdf.pdf"), g, width = 10, height = 5)
}




for(stage in stages){
g <- plot_stage_dynamics(chip_dfs, dyns=rna_dyns_timing, cols=cols_rna_timing, timepoint=stage, type="promoter.mean")
grid.arrange(g)
ggsave(file=paste0("output/rna_chip/", stage, "_rna_timing_violin_ecdf.pdf"), g, width = 10, height = 5)
}




for(stage in stages){
g <- plot_stage_dynamics(chip_dfs, dyns=rna_dyns_timing, cols=cols_rna_timing, timepoint=stage, type="peak.width")
grid.arrange(g)
ggsave(file=paste0("output/rna_chip/", stage, "_peak_width_rna_timing_violin_ecdf.pdf"), g, width = 10, height = 5)
}



